****** Emerging Researchers NSF Project 
****** Gender Discrepancies in Publication Productivity of High-Performing Life Science Graduate Students 
****** Data Code: Descriptive Statistics & Analysis
****** Built July 2016 - Sept 2018, Public Version Created July 2019 

********************************************************************************

*** Open Part 1 Wide-Form Data File (Part1_WideData)
** Set Directory Path 

********************************************************************************

*** Part 1: Descriptive Statistics

** Globals
global totalpub everpub tot_pub_16 tot_pub_5  
global pistats prior_pub_dum year_prop_p award_p female_p advmatch field_1_p field_2_p field_3_p 
global placement complete_phd_p time_p research_p foc_academic_p postdoc_p
global gradstats g_d_uni_public_p g_d_avg_pubs_per_fac_p nrc_female_fac_pct g_d_grfp_total2yr_p nrc_avg_GRE g_flagship_p g_landgrnt_p rank1 rank2 rank3 size1 size2 size3 size4
global baccstats bac_public bac_US_domestic bac_liberal_arts bac_carnegie_R1 bac_carnegie_R2 bac_carnegie_uni bac_carnegie_ProfSch
	
** Table A2: Full Sample CCA Descriptive Statistics 
* C1: Full Sample 
foreach miss in missB { 
eststo clear 
eststo: estpost sum $totalpub $pistats $placement $gradstats $baccstats if `miss' == 0
	*esttab using "$dir/Tables/DescriptiveStats_`miss'.csv", label title(Descriptive Statistics) mtitle ("Full Sample") cells("count(fmt(0)) mean(fmt(3)) sd(fmt(3)) min(fmt(3)) max(fmt(3))") replace plain
* C2 & C3: Female Awardees & Honorable Mentions
eststo clear
bys award_p: eststo: estpost sum $totalpub $pistats $placement $gradstats $baccstats if (female_p == 1 & `miss' == 0)
	*esttab using "$dir/Tables/DescriptiveStats_`miss'.csv", label title(Descriptive Statistics) cells("count(fmt(0)) mean(fmt(3)) sd(fmt(3))") append plain
* C4 & C5: Male Awardees & Honorable Mentions
eststo clear
bys award_p: eststo: estpost sum $totalpub $pistats $placement $gradstats $baccstats if (female_p == 0 & `miss' == 0) 
	*esttab using "$dir/Tables/DescriptiveStats_`miss'.csv", label title(Descriptive Statistics) cells("count(fmt(0)) mean(fmt(3)) sd(fmt(3))") append plain
}

** Table 1: Coarsened CCA Sample Descriptive Statistics
* C1: Full Sample 
foreach miss in missB { 
eststo clear
eststo: estpost sum $totalpub $pistats $placement $gradstats $baccstats if (cem_matched == 1 & `miss' == 0)
	*esttab using "$dir/Tables/DescriptiveStats_cem_`miss'.csv", label title(Descriptive Statistics) mtitle ("CEM Sample") cells("count(fmt(0)) mean(fmt(3)) sd(fmt(3)) min(fmt(3)) max(fmt(3))") replace plain 
* C2 & C3: Female Awardees & Honorable Mentions
eststo clear
bys award_p: eststo: estpost sum $totalpub $pistats $placement $gradstats $baccstats if (female_p == 1 & cem_matched == 1 & `miss' == 0) 
	*esttab using "$dir/Tables/DescriptiveStats_cem_`miss'.csv", label title(Descriptive Statistics) cells("count(fmt(0)) mean(fmt(3)) sd(fmt(3))") append plain 
* C4 & C5: Male Awardees & Honorable Mentions
eststo clear
bys award_p: eststo: estpost sum $totalpub $pistats $placement $gradstats $baccstats if (female_p == 0 & cem_matched == 1 & `miss' == 0)  
	*esttab using "$dir/Tables/DescriptiveStats_cem_`miss'.csv", label title(Descriptive Statistics) cells("count(fmt(0)) mean(fmt(3)) sd(fmt(3))") append plain 
}

********************************************************************************
********************************************************************************

*** Open Part 2 Long-Form Data File (Part2_LongData)
** Set Directory Path 

********************************************************************************

*** Part 2: Additional Descriptives   

** A: Additional Statistics 
* New Publications
sum new_pub new_pub_moveavg3 if missB == 0 & cem_matched == 1
bys award_p female_p: sum new_pub new_pub_moveavg3 if missB == 0 & cem_matched == 1
sum new_pub new_pub_moveavg3 if missB == 0 
bys award_p female_p: sum new_pub new_pub_moveavg3 if missB == 0 
* Pre-Trend Diagnostics
foreach miss in missA missB {
ttest tot_pub if year == 5 & `miss' == 0 & award_p == 1, by(female_p)
ttest tot_pub if year < 6 & `miss' == 0 & award_p == 1, by(female_p)
ttest tot_pub if year == 5 & `miss' == 0 & female_p == 1, by(award_p)
ttest tot_pub if year < 6 & `miss' == 0 & female_p == 1, by(award_p)
ksmirnov tot_pub if year == 5 & `miss' == 0 & award_p == 1, by(female_p)
ksmirnov tot_pub if year < 6 & `miss' == 0 & award_p == 1, by(female_p)
ksmirnov tot_pub if year == 5 & `miss' == 0 & female_p == 1, by(award_p)
ksmirnov tot_pub if year < 6 & `miss' == 0 & female_p == 1, by(award_p)
}
* Compare Male HM to Female HM in Post Period 
foreach miss in missA missB {
ttest tot_pub if year == 7 & `miss' == 0 & award_p == 0, by(female_p)
ttest tot_pub if year == 8 & `miss' == 0 & award_p == 0, by(female_p)
ttest tot_pub if year == 9 & `miss' == 0 & award_p == 0, by(female_p)
ttest tot_pub if year == 10 & `miss' == 0 & award_p == 0, by(female_p)
ttest tot_pub if year == 11 & `miss' == 0 & award_p == 0, by(female_p)
ttest tot_pub if year == 12 & `miss' == 0 & award_p == 0, by(female_p)
ttest tot_pub if year == 13 & `miss' == 0 & award_p == 0, by(female_p)
ttest tot_pub if year == 14 & `miss' == 0 & award_p == 0, by(female_p)
ttest tot_pub if year == 15 & `miss' == 0 & award_p == 0, by(female_p)
ttest tot_pub if year == 16 & `miss' == 0 & award_p == 0, by(female_p)
}
* Compare Male HM to Female A in Post Period 
foreach miss in missA missB {
ttest tot_pub if year == 7 & `miss' == 0 & fa_mhm == 1, by(female_p)
ttest tot_pub if year == 8 & `miss' == 0 & fa_mhm == 1, by(female_p)
ttest tot_pub if year == 9 & `miss' == 0 & fa_mhm == 1, by(female_p)
ttest tot_pub if year == 10 & `miss' == 0 & fa_mhm == 1, by(female_p)
ttest tot_pub if year == 11 & `miss' == 0 & fa_mhm == 1, by(female_p)
ttest tot_pub if year == 12 & `miss' == 0 & fa_mhm == 1, by(female_p)
ttest tot_pub if year == 13 & `miss' == 0 & fa_mhm == 1, by(female_p)
ttest tot_pub if year == 14 & `miss' == 0 & fa_mhm == 1, by(female_p)
ttest tot_pub if year == 15 & `miss' == 0 & fa_mhm == 1, by(female_p)
ttest tot_pub if year == 16 & `miss' == 0 & fa_mhm == 1, by(female_p)
}
	
** B: Power Tests
* Primary Model: reg tot_pub $ThresDDD $picontrols i.year_prop_p $gradcontrols i.g_d_R3_mid i.g_d_ProgramSize_Q_p $bacontrols i.baccategory_cd if cem_matched == 1, cluster(pi_id)
* Use High R-squareds and difference with addition of single treatment variable
powerreg, r2f(0.40) r2r(0.35) nvar(39) ntest(1) power(.9)	
powerreg, r2f(0.40) r2r(0.39) nvar(39) ntest(1) power(.9)	
* Use actual R-squareds from baseline model with addition of single treatment variable
powerreg, r2f(0.368) r2r(0.366) nvar(39) ntest(1) n(8460)	
powerreg, r2f(0.368) r2r(0.366) nvar(39) ntest(1) n(4512)	
* Advisor Model: reg tot_adv $awardthresdd $picontrols i.year_prop_p $gradcontrols i.g_d_R3_mid i.g_d_ProgramSize_Q_p $bacontrols i.baccategory_cd if cem_matched == 1 & award_p == 0 & bin_advcite16 == 1, cluster(pi_id)
* Use High R-squareds and difference with addition of single treatment variable
powerreg, r2f(0.40) r2r(0.35) nvar(30) ntest(1) power(.9)	
* Use actual R-squareds from baseline model with addition of single treatment variable
powerreg, r2f(0.368) r2r(0.361) nvar(30) ntest(1) n(1080)	
	
** C: Figures of Baseline DDD Graphs in Counts
* Figure 1 Panel A: Coarsened Sample Annual Average Total Publications by Gender & Award Status (N = 707)
twoway 	(connected cpub_amA year, mcolor(gold) lc(gold) msymbol(s)) ///
		(connected cpub_hfA year, mcolor(navy) lc(navy) msymbol(t)) ///
		(connected cpub_afA year, mcolor(teal) lc(teal) msymbol(o)) ///
		(connected cpub_hmA year, mcolor(black) lc(black) msymbol(x) xline(6, lpattern(longdash) lcolor(gs10))), ///
	yscale(range(0 9)) ylabel(0(2)9) ytitle("Number of Total Publications") xlabel(1 "-5" 6 "0" 11 "5" 16 "10") xtitle("Year") graphregion(fcolor(white)) ///
	legend(label(1 "Male Awardee") label(2 "Female Honorable Mention") label(3 "Female Awardee") label(4 "Male Honorable Mention")) 
	*graph save Graph "$dir/Figures/DDD_cem_totpub.gph", replace 
* Figure 1 Panel B: Coarsened Sample Annual Average New Publications by Gender & Award Status (N = 707)
twoway 	(connected cnewpub_amA year, mcolor(gold) lc(gold) msymbol(s)) ///
		(connected cnewpub_hfA year, mcolor(navy) lc(navy) msymbol(t)) ///
		(connected cnewpub_afA year, mcolor(teal) lc(teal) msymbol(o)) ///
		(connected cnewpub_hmA year, mcolor(black) lc(black) msymbol(x) xline(6, lpattern(longdash) lcolor(gs10))), ///
	yscale(range(0 1.25)) ylabel(0(.25)1.25) ytitle("Number of New Publications") xlabel(1 "-5" 6 "0" 11 "5" 16 "10") xtitle("Year") graphregion(fcolor(white)) ///
	legend(label(1 "Male Awardee") label(2 "Female Honorable Mention") label(3 "Female Awardee") label(4 "Male Honorable Mention")) 
	*graph save Graph "$dir/Figures/DDD_cem_newpub.gph", replace 
* Figure A1 Panel A: Full Sample Annual Average Total Publications by Gender & Award Status (N = 873)
twoway 	(connected pub_amA year, mcolor(gold) lc(gold) msymbol(s)) ///
		(connected pub_hfA year, mcolor(navy) lc(navy) msymbol(t)) ///
		(connected pub_afA year, mcolor(teal) lc(teal) msymbol(o)) ///
		(connected pub_hmA year, mcolor(black) lc(black) msymbol(x) xline(6, lpattern(longdash) lcolor(gs10))), ///
	yscale(range(0 10)) ylabel(0(2)10) ytitle("Number of Total Publications") xlabel(1 "-5" 6 "0" 11 "5" 16 "10") xtitle("Year") graphregion(fcolor(white)) ///
	legend(label(1 "Male Awardee") label(2 "Female Honorable Mention") label(3 "Female Awardee") label(4 "Male Honorable Mention")) 
	*graph save Graph "$dir/Figures/DDD_full_totpub.gph", replace 
* Figure A1 Panel B: Full Sample Annual Average New Publications by Gender & Award Status (N = 873)
twoway 	(connected newpub_amA year, mcolor(gold) lc(gold) msymbol(s)) ///
		(connected newpub_hfA year, mcolor(navy) lc(navy) msymbol(t)) ///
		(connected newpub_afA year, mcolor(teal) lc(teal) msymbol(o)) ///
		(connected newpub_hmA year, mcolor(black) lc(black) msymbol(x) xline(6, lpattern(longdash) lcolor(gs10))), ///
	yscale(range(0 1.25)) ylabel(0(.25)1.25) ytitle("Number of New Publications") xlabel(1 "-5" 6 "0" 11 "5" 16 "10") xtitle("Year") graphregion(fcolor(white)) ///
	legend(label(1 "Male Awardee") label(2 "Female Honorable Mention") label(3 "Female Awardee") label(4 "Male Honorable Mention")) 
	*graph save Graph "$dir/Figures/DDD_full_newpub.gph", replace 
* Figure A2: Coarsened Sample Proportion of Any Publications by Gender & Award Status (N = 707)
twoway 	(connected cany_amA year, mcolor(gold) lc(gold) msymbol(s)) ///
		(connected cany_hfA year, mcolor(navy) lc(navy) msymbol(t)) ///
		(connected cany_afA year, mcolor(teal) lc(teal) msymbol(o)) ///
		(connected cany_hmA year, mcolor(black) lc(black) msymbol(x) xline(6, lpattern(longdash) lcolor(gs10))), ///
	yscale(range(0 1)) ylabel(0(.1)1) ytitle("Any Publications") xlabel(1 "-5" 6 "0" 11 "5" 16 "10") xtitle("Year") graphregion(fcolor(white)) ///
	legend(label(1 "Male Awardee") label(2 "Female Honorable Mention") label(3 "Female Awardee") label(4 "Male Honorable Mention")) 
	*graph save Graph "$dir/Figures/DDD_cem_anypub.gph", replace 

********************************************************************************

*** Part 3: Primary Analysis 

** Globals
global ThresDDD thres_female_award award_female thres_female thres_award female_p award_p threseff
global DDD ddd award_post award_female post_female award_p female_p postpd   
global picontrols field_2_p field_3_p prior_pub_dum advmatch 
global gradcontrols g_d_uni_public g_d_avg_pubs_per_fac_p nrc_female_fac_pct g_d_grfp_total2yr_p nrc_avg_GRE g_flagship_p g_landgrnt_p 
global bacontrols bac_public bac_US_domestic 
global genderdd award_post award_p postpd
global genderthresdd thres_award award_p threseff
global awarddd post_female female_p postpd
global awardthresdd thres_female female_p threseff

** Table 2: OLS DDD Estimation Results
foreach outcome in tot_pub new_pub_moveavg3 {
* Columns 1 & 2: Total Publications & New Publications
quietly reg `outcome' $ThresDDD $picontrols i.year_prop_p $gradcontrols i.g_d_R3_mid i.g_d_ProgramSize_Q_p $bacontrols i.baccategory_cd if cem_matched == 1, cluster(pi_id)
	*outreg2 using "$dir/Tables/PanelDDD.xls", append dec(3) e(r2_a) ctitle(OLS DDD, `outcome', Full Timeframe) label addtext(Clustered by PI, Yes, Sample, CEM Full Timeframe, Cutoff, Threshold Year Omitted, Controls, Yes)
* Abbreviated Timeframe
quietly reg `outcome' $DDD $picontrols i.year_prop_p $gradcontrols i.g_d_R3_mid i.g_d_ProgramSize_Q_p $bacontrols i.baccategory_cd if cem_matched == 1 & (year < 6 | year > 11), cluster(pi_id)
	*outreg2 using "$dir/Tables/PanelDDD.xls", append dec(3) e(r2_a) ctitle(OLS DDD, `outcome', Five Prior and Post Years) label addtext(Clustered by PI, Yes, Sample, CEM Abbreviated Timeframe, Cutoff, Threshold Year Omitted, Controls, Yes)
}

** Table 3: Stratified Panel OLS DD Estimation Results
quietly reg tot_pub award_p
	*outreg2 using "$dir/Tables/Strat_Full.xls", replace
foreach outcome in tot_pub new_pub_moveavg3 {
* Within Gender (Columns 1, 2, 6, & 7)
quietly reg `outcome' $genderthresdd $picontrols i.year_prop_p $gradcontrols i.g_d_R3_mid i.g_d_ProgramSize_Q_p $bacontrols i.baccategory_cd if cem_matched == 1 & female_p == 1, cluster(pi_id)
	*outreg2 using "$dir/Tables/Strat_Full.xls", append dec(3) e(r2_a) ctitle(OLS Female DD, `outcome', Full Timeframe) label addtext(Clustered by PI, Yes, Sample, CEM Female, Cutoff, Threshold Year Omitted, Controls, Yes)
quietly reg `outcome' $genderthresdd $picontrols i.year_prop_p $gradcontrols i.g_d_R3_mid i.g_d_ProgramSize_Q_p $bacontrols i.baccategory_cd if cem_matched == 1 & female_p == 0, cluster(pi_id)
	*outreg2 using "$dir/Tables/Strat_Full.xls", append dec(3) e(r2_a) ctitle(OLS Male DD, `outcome', Full Timeframe) label addtext(Clustered by PI, Yes, Sample, CEM Male, Cutoff, Threshold Year Omitted, Controls, Yes)
* Within Award (Columns 3, 4, 8, & 9)
quietly reg `outcome' $awardthresdd $picontrols i.year_prop_p $gradcontrols i.g_d_R3_mid i.g_d_ProgramSize_Q_p $bacontrols i.baccategory_cd if cem_matched == 1 & award_p == 1, cluster(pi_id)
	*outreg2 using "$dir/Tables/Strat_Full.xls", append dec(3) e(r2_a) ctitle(OLS Awardee DD, `outcome', Full Timeframe) label addtext(Clustered by PI, Yes, Sample, CEM Awardee, Cutoff, Threshold Year Omitted, Controls, Yes)
quietly reg `outcome' $awardthresdd $picontrols i.year_prop_p $gradcontrols i.g_d_R3_mid i.g_d_ProgramSize_Q_p $bacontrols i.baccategory_cd if cem_matched == 1 & award_p == 0, cluster(pi_id)
	*outreg2 using "$dir/Tables/Strat_Full.xls", append dec(3) e(r2_a) ctitle(OLS HM DD, `outcome', Full Timeframe) label addtext(Clustered by PI, Yes, Sample, CEM HM, Cutoff, Threshold Year Omitted, Controls, Yes)
* Across Gender & Award (Columns 5 & 10)
quietly reg `outcome' award_female $picontrols i.year_prop_p $gradcontrols i.g_d_R3_mid i.g_d_ProgramSize_Q_p $bacontrols i.baccategory_cd if cem_matched == 1 & ((female_p == 1 & award_p == 1) | (female_p == 0 & award_p == 0)), cluster(pi_id)
	*outreg2 using "$dir/Tables/Strat_Full.xls", append dec(3) e(r2_a) ctitle(OLS FA/MHM DD, `outcome', Full Timeframe) label addtext(Clustered by PI, Yes, Sample, CEM Awardee, Cutoff, None, Controls, Yes)
}

** Stratified Two-Period OLS DD Estimation Results with Varying Post Periods for CEM Sample
	* These results were used to populate Table A6 and Figures 2 and 3 
quietly reg tot_pub award_p
	*outreg2 using "$dir/Tables/Strat_TwoPd.xls", replace 
foreach outcome in tot_pub new_pub_moveavg3 {
forvalue i = 7(1)16 {
* Within Gender 
quietly reg `outcome' $genderdd $picontrols i.year_prop_p $gradcontrols i.g_d_R3_mid i.g_d_ProgramSize_Q_p $bacontrols i.baccategory_cd if cem_matched == 1 & female_p == 1 & (year == 5 | year == `i'), cluster(pi_id)
	*outreg2 using "$dir/Tables/Strat_TwoPd.xls", append dec(3) e(r2_a) ctitle(OLS Female DD, `outcome', Post Year `i') label addtext(Clustered by PI, Yes, Sample, CEM Female, Cutoff, GRFP/Threshold Year Omitted, Controls, Yes)
quietly reg `outcome' $genderdd $picontrols i.year_prop_p $gradcontrols i.g_d_R3_mid i.g_d_ProgramSize_Q_p $bacontrols i.baccategory_cd if cem_matched == 1 & female_p == 0 & (year == 5 | year == `i'), cluster(pi_id)
	*outreg2 using "$dir/Tables/Strat_TwoPd.xls", append dec(3) e(r2_a) ctitle(OLS Male DD, `outcome', Post Year `i') label addtext(Clustered by PI, Yes, Sample, CEM Male, Cutoff, GRFP/Threshold Year Omitted, Controls, Yes)
* Within Award
quietly reg `outcome' $awarddd $picontrols i.year_prop_p $gradcontrols i.g_d_R3_mid i.g_d_ProgramSize_Q_p $bacontrols i.baccategory_cd if cem_matched == 1 & award_p == 1 & (year == 5 | year == `i'), cluster(pi_id)
	*outreg2 using "$dir/Tables/Strat_TwoPd.xls", append dec(3) e(r2_a) ctitle(OLS Awardee DD, `outcome', Post Year `i') label addtext(Clustered by PI, Yes, Sample, CEM Awardee, Cutoff, GRFP/Threshold Year Omitted, Controls, Yes)
quietly reg `outcome' $awarddd $picontrols i.year_prop_p $gradcontrols i.g_d_R3_mid i.g_d_ProgramSize_Q_p $bacontrols i.baccategory_cd if cem_matched == 1 & award_p == 0 & (year == 5 | year == `i'), cluster(pi_id)
	*outreg2 using "$dir/Tables/Strat_TwoPd.xls", append dec(3) e(r2_a) ctitle(OLS HM DD, `outcome', Post Year `i') label addtext(Clustered by PI, Yes, Sample, CEM HM, Cutoff, GRFP/Threshold Year Omitted, Controls, Yes)
* Across Gender & Award
quietly reg `outcome' award_female $picontrols i.year_prop_p $gradcontrols i.g_d_R3_mid i.g_d_ProgramSize_Q_p $bacontrols i.baccategory_cd if cem_matched == 1 & ((female_p == 1 & award_p == 1) | (female_p == 0 & award_p == 0)) & (year == 5 | year == `i'), cluster(pi_id)
	*outreg2 using "$dir/Tables/Strat_TwoPd.xls", append dec(3) e(r2_a) ctitle(OLS FA/MHM DD, `outcome', Post Year `i') label addtext(Clustered by PI, Yes, Sample, CEM Awardee, Cutoff, None, Controls, Yes)
}	
}	

********************************************************************************

*** Part 4: Robustness & Sensitivity Analyses	

** Table A5 Panel A: Clustered by Year - OLS DDD Estimation Robustness Analyses on Total Publications
* OLS DDD Estimations Clustered by Year
quietly reg tot_pub award_p
	*outreg2 using "$dir/Tables/PanelDDD_YearCluster.xls", replace
foreach outcome in tot_pub {
quietly reg `outcome' $ThresDDD $picontrols i.year_prop_p $gradcontrols i.g_d_R3_mid i.g_d_ProgramSize_Q_p $bacontrols i.baccategory_cd if cem_matched == 1, cluster(year)
	*outreg2 using "$dir/Tables/PanelDDD_YearCluster.xls", append dec(3) e(r2_a) ctitle(OLS DDD, `outcome', Full Timeframe) label addtext(Clustered, Year, Sample, CEM Full Timeframe, Cutoff, Threshold Year Omitted, Controls, Yes)
* Within Gender DD Estimations 
quietly reg `outcome' $genderthresdd $picontrols i.year_prop_p $gradcontrols i.g_d_R3_mid i.g_d_ProgramSize_Q_p $bacontrols i.baccategory_cd if cem_matched == 1 & female_p == 1, cluster(year)
	*outreg2 using "$dir/Tables/PanelDDD_YearCluster.xls", append dec(3) e(r2_a) ctitle(OLS Female DD, `outcome', Full Timeframe) label addtext(Clustered, Year, Sample, CEM Female, Cutoff, Threshold Year Omitted, Controls, Yes)
quietly reg `outcome' $genderthresdd $picontrols i.year_prop_p $gradcontrols i.g_d_R3_mid i.g_d_ProgramSize_Q_p $bacontrols i.baccategory_cd if cem_matched == 1 & female_p == 0, cluster(year)
	*outreg2 using "$dir/Tables/PanelDDD_YearCluster.xls", append dec(3) e(r2_a) ctitle(OLS Male DD, `outcome', Full Timeframe) label addtext(Clustered, Year, Sample, CEM Male, Cutoff, Threshold Year Omitted, Controls, Yes)
* Within Award Status DD Estimations
quietly reg `outcome' $awardthresdd $picontrols i.year_prop_p $gradcontrols i.g_d_R3_mid i.g_d_ProgramSize_Q_p $bacontrols i.baccategory_cd if cem_matched == 1 & award_p == 1, cluster(year)
	*outreg2 using "$dir/Tables/PanelDDD_YearCluster.xls", append dec(3) e(r2_a) ctitle(OLS Awardee DD, `outcome', Full Timeframe) label addtext(Clustered, Year, Sample, CEM Awardee, Cutoff, Threshold Year Omitted, Controls, Yes)
quietly reg `outcome' $awardthresdd $picontrols i.year_prop_p $gradcontrols i.g_d_R3_mid i.g_d_ProgramSize_Q_p $bacontrols i.baccategory_cd if cem_matched == 1 & award_p == 0, cluster(year)
	*outreg2 using "$dir/Tables/PanelDDD_YearCluster.xls", append dec(3) e(r2_a) ctitle(OLS HM DD, `outcome', Full Timeframe) label addtext(Clustered, Year, Sample, CEM HM, Cutoff, Threshold Year Omitted, Controls, Yes)
* Female Awardees versus Male Honorable Mentions
quietly reg `outcome' award_female $picontrols i.year_prop_p $gradcontrols i.g_d_R3_mid i.g_d_ProgramSize_Q_p $bacontrols i.baccategory_cd if cem_matched == 1 & ((female_p == 1 & award_p == 1) | (female_p == 0 & award_p == 0)), cluster(year)
	*outreg2 using "$dir/Tables/PanelDDD_YearCluster.xls", append dec(3) e(r2_a) ctitle(OLS FA/MHM DD, `outcome', Full Timeframe) label addtext(Clustered, Year, Sample, CEM Awardee, Cutoff, None, Controls, Yes)
}	

** Table A5 Panel B: Fixed Effects - OLS DDD Estimation Robustness Analyses on Total Publications
* Baseline DDD FE Model
xtset pi_id year 
xtreg tot_pub $ThresDDD i.year if cem_matched == 1, fe
	*outreg2 using "$dir/Tables/FE_DDD.xls", append dec(3) e(ll r2_a rmse) ctitle(FE DDD, Total Publications, Full Timeframe) label addtext(Fixed Effects, Yes, Sample, CEM Full Timeframe, Cutoff, Threshold Year Omitted, Controls, No)
* Stratified DD Within Gender
xtreg tot_pub $genderthresdd i.year if cem_matched == 1 & female_p == 1, fe
	*outreg2 using "$dir/Tables/FE_DDD.xls", append dec(3) e(ll r2_a rmse) ctitle(FE Female DDD, Total Publications, Full Timeframe) label addtext(Fixed Effects, Yes, Sample, CEM Female Full Timeframe, Cutoff, Threshold Year Omitted, Controls, No)
xtreg tot_pub $genderthresdd i.year if cem_matched == 1 & female_p == 0, fe
	*outreg2 using "$dir/Tables/FE_DDD.xls", append dec(3) e(ll r2_a rmse) ctitle(FE Male DDD, Total Publications, Full Timeframe) label addtext(Fixed Effects, Yes, Sample, CEM Male Full Timeframe, Cutoff, Threshold Year Omitted, Controls, No)
* Stratified DD Within Award Status
xtreg tot_pub $awardthresdd i.year if cem_matched == 1 & award_p == 1, fe
	*outreg2 using "$dir/Tables/FE_DDD.xls", append dec(3) e(ll r2_a rmse) ctitle(FE Awardee DDD, Total Publications, Full Timeframe) label addtext(Fixed Effects, Yes, Sample, CEM Awardee Full Timeframe, Cutoff, Threshold Year Omitted, Controls, No)
xtreg tot_pub $awardthresdd i.year if cem_matched == 1 & award_p == 0, fe
	*outreg2 using "$dir/Tables/FE_DDD.xls", append dec(3) e(ll r2_a rmse) ctitle(FE HM DDD, Total Publications, Full Timeframe) label addtext(Fixed Effects, Yes, Sample, CEM HM Full Timeframe, Cutoff, Threshold Year Omitted, Controls, No)

** Table B3: Comparison of Primary Model Across Various CEM Samples
* Estimate Primary Model Compared Across CEM Samples 
quietly reg tot_pub 
	*outreg2 using "$dir/Tables/CEMsamples.xls", replace
foreach sample in cem_matched cem_altsample1 cem_altsample2 cem_altsample3 cem_altsample4 {
quietly reg tot_pub $ThresDDD $picontrols i.year_prop_p $gradcontrols i.g_d_R3_mid i.g_d_ProgramSize_Q_p $bacontrols i.baccategory_cd if `sample' == 1, cluster(pi_id)
	*outreg2 using "$dir/Tables/CEMsamples.xls", append dec(3) e(r2_a) ctitle(OLS DDD, tot_pub, `sample') label addtext(Clustered by PI, Yes, Sample, CEM `sample' Full Timeframe, Cutoff, Threshold Year Omitted, Controls, Yes)
}

** Table A7: OLS DDD Estimation of Broader Impacts Policy Change
* Broader Impacts Sample Comparisons 
ttest female_p, by(broaderimpacts)
ttest female_p if award_p == 1, by(broaderimpacts)
ttest female_p if award_p == 0, by(broaderimpacts)	
* Restricted Broader Impacts Sample on Primary Estimation  
reg tot_pub $ThresDDD $picontrols i.year_prop_p $gradcontrols i.g_d_R3_mid i.g_d_ProgramSize_Q_p $bacontrols i.baccategory_cd if cem_matched == 1, cluster(pi_id)
	*outreg2 using "$dir/Tables/broaderimpacts.xls", replace dec(3) e(r2_a) ctitle(OLS DDD, `outcome', Full Timeframe) label addtext(Clustered by PI, Yes, Sample, CEM Full Timeframe, Cutoff, Threshold Year Omitted, Controls, Yes)
reg tot_pub $ThresDDD $picontrols i.year_prop_p $gradcontrols i.g_d_R3_mid i.g_d_ProgramSize_Q_p $bacontrols i.baccategory_cd if cem_matched == 1 & broaderimpacts == 1, cluster(pi_id)
	*outreg2 using "$dir/Tables/broaderimpacts.xls", append dec(3) e(r2_a) ctitle(OLS DDD, `outcome', BI Timeframe) label addtext(Clustered by PI, Yes, Sample, CEM BI Timeframe, Cutoff, Threshold Year Omitted, Controls, Yes)
reg tot_pub $ThresDDD $picontrols i.year_prop_p $gradcontrols i.g_d_R3_mid i.g_d_ProgramSize_Q_p $bacontrols i.baccategory_cd if cem_matched == 1 & broaderimpacts == 0, cluster(pi_id)
	*outreg2 using "$dir/Tables/broaderimpacts.xls", append dec(3) e(r2_a) ctitle(OLS DDD, `outcome', Pre-BI Timeframe) label addtext(Clustered by PI, Yes, Sample, CEM Pre-BI Timeframe, Cutoff, Threshold Year Omitted, Controls, Yes)

********************************************************************************

*** Part 5: Exploratory Mechanisms 
		
** Table 4: Student-Advisor Joint Publications by Advisor Productivity and Gender 
* Advisor Citations Exploratory Analysis
preserve
keep if cem_matched == 1  
* Advisor Impact Measured at Year 16 
* T-test Joint Publications with Male Advisors by Gender and Advisor Impact 
ttest tot_adv if (dyad == 2 | dyad == 4) & bin_advcite16 == 1 , by(female_p)
ttest tot_adv if (dyad == 2 | dyad == 4) & bin_advcite16 == 1 & award_p == 1, by(female_p)
ttest tot_adv if (dyad == 2 | dyad == 4) & bin_advcite16 == 1 & award_p == 0, by(female_p)
ttest tot_adv if (dyad == 2 | dyad == 4) & bin_advcite16 == 0 , by(female_p)
ttest tot_adv if (dyad == 2 | dyad == 4) & bin_advcite16 == 0 & award_p == 1, by(female_p)
ttest tot_adv if (dyad == 2 | dyad == 4) & bin_advcite16 == 0 & award_p == 0, by(female_p)
ttest tot_adv if (dyad == 2 | dyad == 4) & bin_advcite16 == 1 & fa_mhm == 1, by(female_p)
ttest tot_adv if (dyad == 2 | dyad == 4) & bin_advcite16 == 0 & fa_mhm == 1, by(female_p)
* T-test Joint Publications with Female Advisors by Gender and Advisor Impact 
ttest tot_adv if (dyad == 1 | dyad == 3) & bin_advcite16 == 1 , by(female_p)
ttest tot_adv if (dyad == 1 | dyad == 3) & bin_advcite16 == 1 & award_p == 1, by(female_p)
ttest tot_adv if (dyad == 1 | dyad == 3) & bin_advcite16 == 1 & award_p == 0, by(female_p)
ttest tot_adv if (dyad == 1 | dyad == 3) & bin_advcite16 == 0 , by(female_p)
ttest tot_adv if (dyad == 1 | dyad == 3) & bin_advcite16 == 0 & award_p == 1, by(female_p)
ttest tot_adv if (dyad == 1 | dyad == 3) & bin_advcite16 == 0 & award_p == 0, by(female_p)
ttest tot_adv if (dyad == 1 | dyad == 3) & bin_advcite16 == 1 & fa_mhm == 1, by(female_p)
ttest tot_adv if (dyad == 1 | dyad == 3) & bin_advcite16 == 0 & fa_mhm == 1, by(female_p)	
* T-test Joint Publications with Advisors by Award within Genders and Impact 	
ttest tot_adv if (dyad == 2 | dyad == 4) & bin_advcite16 == 1 & female_p == 1, by(award_p)
ttest tot_adv if (dyad == 2 | dyad == 4) & bin_advcite16 == 0 & female_p == 1, by(award_p)
ttest tot_adv if (dyad == 1 | dyad == 3) & bin_advcite16 == 1 & female_p == 1, by(award_p)
ttest tot_adv if (dyad == 1 | dyad == 3) & bin_advcite16 == 0 & female_p == 1, by(award_p)
ttest tot_adv if (dyad == 2 | dyad == 4) & bin_advcite16 == 1 & female_p == 0, by(award_p)
ttest tot_adv if (dyad == 2 | dyad == 4) & bin_advcite16 == 0 & female_p == 0, by(award_p)
ttest tot_adv if (dyad == 1 | dyad == 3) & bin_advcite16 == 1 & female_p == 0, by(award_p)
ttest tot_adv if (dyad == 1 | dyad == 3) & bin_advcite16 == 0 & female_p == 0, by(award_p)
* Advisor Impact Measured at Year 11
* T-test Joint Publications with Male Advisors by Gender and Advisor Impact 
ttest tot_adv if (dyad == 2 | dyad == 4) & bin_advcite11 == 1 , by(female_p)
ttest tot_adv if (dyad == 2 | dyad == 4) & bin_advcite11 == 1 & award_p == 1, by(female_p)
ttest tot_adv if (dyad == 2 | dyad == 4) & bin_advcite11 == 1 & award_p == 0, by(female_p)
ttest tot_adv if (dyad == 2 | dyad == 4) & bin_advcite11 == 0 , by(female_p)
ttest tot_adv if (dyad == 2 | dyad == 4) & bin_advcite11 == 0 & award_p == 1, by(female_p)
ttest tot_adv if (dyad == 2 | dyad == 4) & bin_advcite11 == 0 & award_p == 0, by(female_p)
ttest tot_adv if (dyad == 2 | dyad == 4) & bin_advcite11 == 1 & fa_mhm == 1, by(female_p)
ttest tot_adv if (dyad == 2 | dyad == 4) & bin_advcite11 == 0 & fa_mhm == 1, by(female_p)
* T-test Joint Publications with Female Advisors by Gender and Advisor Impact 
ttest tot_adv if (dyad == 1 | dyad == 3) & bin_advcite11 == 1 , by(female_p)
ttest tot_adv if (dyad == 1 | dyad == 3) & bin_advcite11 == 1 & award_p == 1, by(female_p)
ttest tot_adv if (dyad == 1 | dyad == 3) & bin_advcite11 == 1 & award_p == 0, by(female_p)
ttest tot_adv if (dyad == 1 | dyad == 3) & bin_advcite11 == 0 , by(female_p)
ttest tot_adv if (dyad == 1 | dyad == 3) & bin_advcite11 == 0 & award_p == 1, by(female_p)
ttest tot_adv if (dyad == 1 | dyad == 3) & bin_advcite11 == 0 & award_p == 0, by(female_p)
ttest tot_adv if (dyad == 1 | dyad == 3) & bin_advcite11 == 1 & fa_mhm == 1, by(female_p)
ttest tot_adv if (dyad == 1 | dyad == 3) & bin_advcite11 == 0 & fa_mhm == 1, by(female_p)	
* T-test Joint Publications with Advisors by Award within Genders and Impact 	
ttest tot_adv if (dyad == 2 | dyad == 4) & bin_advcite11 == 1 & female_p == 1, by(award_p)
ttest tot_adv if (dyad == 2 | dyad == 4) & bin_advcite11 == 0 & female_p == 1, by(award_p)
ttest tot_adv if (dyad == 1 | dyad == 3) & bin_advcite11 == 1 & female_p == 1, by(award_p)
ttest tot_adv if (dyad == 1 | dyad == 3) & bin_advcite11 == 0 & female_p == 1, by(award_p)
ttest tot_adv if (dyad == 2 | dyad == 4) & bin_advcite11 == 1 & female_p == 0, by(award_p)
ttest tot_adv if (dyad == 2 | dyad == 4) & bin_advcite11 == 0 & female_p == 0, by(award_p)
ttest tot_adv if (dyad == 1 | dyad == 3) & bin_advcite11 == 1 & female_p == 0, by(award_p)
ttest tot_adv if (dyad == 1 | dyad == 3) & bin_advcite11 == 0 & female_p == 0, by(award_p)
restore 

** Table 5: OLS DDD Estimation Results by First-Placement 
* DDD Estimations 
quietly reg tot_pub award_p
	*outreg2 using "$dir/Tables/PanelDDD_Placement.xls", replace 
foreach outcome in tot_pub {
foreach placement in research_p acadres_p postdoc_p {
quietly reg `outcome' $ThresDDD $picontrols i.year_prop_p $gradcontrols i.g_d_R3_mid i.g_d_ProgramSize_Q_p $bacontrols i.baccategory_cd if cem_matched == 1 & `placement' == 1, cluster(pi_id)
	*outreg2 using "$dir/Tables/PanelDDD_Placement.xls", append dec(3) e(r2_a) ctitle(OLS DDD, `outcome', Full Timeframe) label addtext(Clustered by PI, Yes, Sample, CEM `placement' Full Timeframe, Cutoff, Threshold Year Omitted, Controls, Yes)
}	
}
* Stratified DDs   
quietly reg tot_pub award_p
	*outreg2 using "$dir/Tables/Strat_Placement.xls", replace 
* Within Gender
foreach outcome in tot_pub {
foreach placement in research_p acadres_p postdoc_p {
quietly reg `outcome' $genderthresdd $picontrols i.year_prop_p $gradcontrols i.g_d_R3_mid i.g_d_ProgramSize_Q_p $bacontrols i.baccategory_cd if cem_matched == 1 & female_p == 1 & `placement' == 1, cluster(pi_id)
	*outreg2 using "$dir/Tables/Strat_Placement.xls", append dec(3) e(r2_a) ctitle(OLS Female DD, `outcome', Full Timeframe) label addtext(Clustered by PI, Yes, Sample, CEM Female `placement' Full Timeframe, Cutoff, Threshold Year Omitted, Controls, Yes)
quietly reg `outcome' $genderthresdd $picontrols i.year_prop_p $gradcontrols i.g_d_R3_mid i.g_d_ProgramSize_Q_p $bacontrols i.baccategory_cd if cem_matched == 1 & female_p == 0 & `placement' == 1, cluster(pi_id)
	*outreg2 using "$dir/Tables/Strat_Placement.xls", append dec(3) e(r2_a) ctitle(OLS Male DD, `outcome', Full Timeframe) label addtext(Clustered by PI, Yes, Sample, CEM Male `placement' Full Timeframe, Cutoff, Threshold Year Omitted, Controls, Yes)
* Within Award Status
quietly reg `outcome' $awardthresdd $picontrols i.year_prop_p $gradcontrols i.g_d_R3_mid i.g_d_ProgramSize_Q_p $bacontrols i.baccategory_cd if cem_matched == 1 & award_p == 1 & `placement' == 1, cluster(pi_id)
	*outreg2 using "$dir/Tables/Strat_Placement.xls", append dec(3) e(r2_a) ctitle(OLS Awardee DD, `outcome', Full Timeframe) label addtext(Clustered by PI, Yes, Sample, CEM Awardee `placement' Full Timeframe, Cutoff, Threshold Year Omitted, Controls, Yes)
quietly reg `outcome' $awardthresdd $picontrols i.year_prop_p $gradcontrols i.g_d_R3_mid i.g_d_ProgramSize_Q_p $bacontrols i.baccategory_cd if cem_matched == 1 & award_p == 0 & `placement' == 1, cluster(pi_id)
	*outreg2 using "$dir/Tables/Strat_Placement.xls", append dec(3) e(r2_a) ctitle(OLS HM DD, `outcome', Full Timeframe) label addtext(Clustered by PI, Yes, Sample, CEM HM `placement' Full Timeframe, Cutoff, Threshold Year Omitted, Controls, Yes)
* Female Awardees versus Male Honorable Mentions
quietly reg `outcome' award_female $picontrols i.year_prop_p $gradcontrols i.g_d_R3_mid i.g_d_ProgramSize_Q_p $bacontrols i.baccategory_cd if cem_matched == 1 & ((female_p == 1 & award_p == 1) | (female_p == 0 & award_p == 0)) & `placement' == 1, cluster(pi_id)
	*outreg2 using "$dir/Tables/Strat_Placement.xls", append dec(3) e(r2_a) ctitle(OLS FA/MHM DD, `outcome', Full Timeframe) label addtext(Clustered by PI, Yes, Sample, CEM Awardee `placement' Full Timeframe, Cutoff, None, Controls, Yes)	
}
}
	
********************************************************************************
********************************************************************************

*** Open Part 3 Long-Form Data File (Part3_LongData)
** Set Directory Path 

********************************************************************************

*** Part 6: Threshold Analysis 

** Globals
global picontrols field_2_p field_3_p prior_pub_dum advmatch 
global gradcontrols g_d_uni_public g_d_avg_pubs_per_fac_p nrc_female_fac_pct g_d_grfp_total2yr_p nrc_avg_GRE g_flagship_p g_landgrnt_p 
global bacontrols bac_public bac_US_domestic 

** Table A3: Threshold Estimation Results on Year of Effect
* Full Sample
threshold tot_pub $picontrols i.year_prop_p $gradcontrols i.g_d_R3_mid i.g_d_ProgramSize_Q_p $bacontrols i.baccategory_cd , regionvars(award_p##female_p) threshvar(year)
	*outreg2 using "$dir/Tables/resultsthreshold.xls", replace ctitle("full")
* CEM Sample
threshold tot_pub $picontrols i.year_prop_p $gradcontrols i.g_d_R3_mid i.g_d_ProgramSize_Q_p $bacontrols i.baccategory_cd if cem_matched == 1, regionvars(award_p##female_p ) threshvar(year)
	*outreg2 using "$dir/Tables/resultsthreshold.xls", append ctitle("cem")

** Table A4: Machine Learning Estimation Results
* Machine Learning
reg tot_pub i.year
	*outreg2 using "$dir/Tables/resultsml_full.xls", replace 
forvalues i=6(1)15 {
gen postpd =year > `i'
reg tot_pub $picontrols i.year_prop_p $gradcontrols i.g_d_R3_mid i.g_d_ProgramSize_Q_p $bacontrols i.baccategory_cd postpd##award_p##female_p   if mod(pi_id,2)==0 
	*outreg2 using "$dir/Tables/resultsml_full.xls", append ctitle("training")
reg tot_pub $picontrols i.year_prop_p $gradcontrols i.g_d_R3_mid i.g_d_ProgramSize_Q_p $bacontrols i.baccategory_cd postpd##award_p##female_p  if mod(pi_id,2)==1
	*outreg2 using "$dir/Tables/resultsml_full.xls", append ctitle("test")
reg tot_pub $picontrols i.year_prop_p $gradcontrols i.g_d_R3_mid i.g_d_ProgramSize_Q_p $bacontrols i.baccategory_cd postpd##award_p##female_p   if mod(pi_id,2)==0 & cem_matched == 1
	*outreg2 using "$dir/Tables/resultsml_cem.xls", append ctitle("training")
reg tot_pub $picontrols i.year_prop_p $gradcontrols i.g_d_R3_mid i.g_d_ProgramSize_Q_p $bacontrols i.baccategory_cd postpd##award_p##female_p   if mod(pi_id,2)==1 & cem_matched == 1
	*outreg2 using "$dir/Tables/resultsml_cem.xls", append ctitle("test")
drop postpd
} 

********************************************************************************
*************************************END****************************************
********************************************************************************
